#---------------------------------------------------------------------------------------------------
# set up
#---------------------------------------------------------------------------------------------------
# clean
rm(list = ls())
invisible(gc())
options(dplyr.summarise.inform = FALSE)
# libraries
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, sf, sp, httr, mapview)
# avoid scientific notation
options(scipen=999)
# create directory
dir.create("data_input")
dir.create("data_output")
dir.create("output")
wd = getwd()
api = "Z:/"
data_input = paste0(wd,"/data_input")
data_output = paste0(wd,"/data_output")
output = paste0(wd,"/output")
# todays date, used as filter
today = str_remove_all(Sys.Date(), "-")
## Read API keys
api_fil <- read_file(paste0(api, "api"))
trafiklab_key = gsub('^.*trafiklab_gtfsstatik: \\s*|\\s*\r.*$', "", api_fil)
# url for GTFS
url <- paste0("https://opendata.samtrafiken.se/gtfs/ul/ul.zip?key=", trafiklab_key)
#---------------------------------------------------------------------------------------------------
# load data
#---------------------------------------------------------------------------------------------------
routes = read.csv2(paste0(data_input, "/trafiklab_ul/routes.txt"),
sep = ",", encoding="UTF-8", stringsAsFactors=FALSE)
stops = read.csv2(paste0(data_input, "/trafiklab_ul/stops.txt"),
sep = ",", encoding="UTF-8", stringsAsFactors=FALSE)
stop_times = read.csv2(paste0(data_input, "/trafiklab_ul/stop_times.txt"),
sep = ",", encoding="UTF-8", stringsAsFactors=FALSE)
trips = read.csv2(paste0(data_input, "/trafiklab_ul/trips.txt"),
sep = ",", encoding="UTF-8", stringsAsFactors=FALSE)
calendar_dates = read.csv2(paste0(data_input, "/trafiklab_ul/calendar_dates.txt"),
sep = ",", encoding="UTF-8", stringsAsFactors=FALSE)
### Create filter variables
# service_id för rätt datum
service_id_inklud = calendar_dates %>% filter(date == today) %>% select(service_id) %>% pull()
# trips för rätt datum
trips_inklud = trips %>% filter(service_id %in% service_id_inklud) %>% select(trip_id) %>% pull()
#---------------------------------------------------------------------------------------------------
# Merge gtfs tables
#---------------------------------------------------------------------------------------------------
gtfs = stop_times %>%
left_join(., trips, by = "trip_id") %>%
left_join(., stops, by = "stop_id") %>%
left_join(., routes, by = "route_id") %>%
mutate(hpl_id = substr(stop_id, 8, 13)) %>%
filter(trip_id %in% trips_inklud) %>% # remove all rows referring to other dates
distinct(arrival_time, departure_time, stop_id, .keep_all= TRUE) # remove duplicates
#---------------------------------------------------------------------------------------------------
# Data hantering
#---------------------------------------------------------------------------------------------------
antal_departure = gtfs %>%
group_by(hpl_id) %>%
summarise(antal_dep = n())
antal_linjer = gtfs %>%
distinct(hpl_id, route_short_name) %>%
group_by(hpl_id) %>%
summarise(antal_linjer = n())
## Tidtabelldata är på hållplatslägenivå. Ta medel för att skapa en koordinat per hållplats
hpl_koord = gtfs %>%
group_by(hpl_id, stop_name) %>%
summarise(lat = round(mean(as.numeric(stop_lat)), 5), lon = round(mean(as.numeric(stop_lon)), 5)) %>%
ungroup() %>%
left_join(antal_departure, by = "hpl_id") %>%
left_join(antal_linjer, by = "hpl_id") %>%
mutate(antal_dep_log = log(as.numeric(antal_dep)))
xy_gtfs = hpl_koord[,c("lon", "lat")]
spdf <- SpatialPointsDataFrame(coords = xy_gtfs, data = hpl_koord) # create spatial points
spdf1 = st_as_sf(spdf) %>% # convert to sf object
st_set_crs(4326)
Antal unika linjer per hållplats per vardagsdygn
mapview(spdf1, zcol = "antal_linjer")
Antal avgångar per hållplats per vardagsdygn
mapview(spdf1, zcol = "antal_dep_log")